---
title: "Text analysis Indivisibility"
author: "Vinh Pham"
date: "2023-08-06"
output: html_document
---
---
title: "Indivisibility Data Analysis"
author: "Vinh Pham"
date: '2023-4-28'
output:
  pdf_document: default
  html_document: default
---


```{r}
# library(haven) #read data
library(tinytex)
library(dplyr)
library(tidyverse) #function %>%
library(tm)
library(SnowballC)
library(wordcloud)

```
# Text analysis (Figure A.5)

```{r}
data <- read.csv('C:/Users/vinht/Desktop/Barg w Indv/Chat texts 2.csv')
text <- iconv(data$Translation,"WINDOWS-1252","UTF-8")
corpus <- SimpleCorpus(VectorSource(text))
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
DTM <- DocumentTermMatrix(corpus)
sums <- as.data.frame(colSums(as.matrix(DTM)))
sums <- rownames_to_column(sums) 
colnames(sums) <- c("term", "count")
sums <- arrange(sums, desc(count))
head <- sums[1:75,]

wordcloud(words = head$term, freq = head$count, min.freq = 1000,
  max.words=100, random.order=FALSE, rot.per=0.35, 
  colors=brewer.pal(8, "Dark2"))

```
```{r}
data <- read.csv('C:/Users/vinht/Desktop/Barg w Indv/Chat texts 2.csv')
text <- iconv(data$Translation,"WINDOWS-1252","UTF-8")
corpus <- SimpleCorpus(VectorSource(text))
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
corpus <- tm_map(corpus, removeWords, c("seven","eight","nine","twelve","thirteen","zero","eleven","fourteen","four","two","ten","hello","thank","please","lets","sixteen","eighteen","sorry","want","will","can","get","dont","think","cant","like","thats","make","doesnt","six","’s","isnt"))
DTM <- DocumentTermMatrix(corpus)
sums <- as.data.frame(colSums(as.matrix(DTM)))
sums <- rownames_to_column(sums) 
colnames(sums) <- c("term", "count")
sums <- arrange(sums, desc(count))
head <- sums[1:75,]

wordcloud(words = head$term, freq = head$count, min.freq = 1000,
  max.words=100, random.order=FALSE, rot.per=0.35, 
  colors=brewer.pal(8, "Dark2"))

```
```{r}
ggsave("chatimg1.png", plot = chatimg1, width = 5, height = 5)
ggsave("chatimg2.png", plot = chatimg2, width = 5, height = 5)

```